Data preparations

load("XSTSF_production.RData")
source('functions.R')

datasets

f0_all_ct <- f0_all_pre %>% filter(focus_condition == 'ct' ) %>% 
  group_by(speaker) %>% 
  mutate(norm_f0 = scale(log(f0))) %>% 
  ungroup() %>% 
  mutate(
    time = as.numeric(time),
    syllable_no = case_when(
    time > 0 & time < 11 ~ 1,
    time > 10 & time < 21 ~ 2,
    time > 20 & time < 31 ~ 3
  ),
  sync_tone1 = ifelse(sync_tone1 == 'RF', 'LHL', sync_tone1),
  sync_tone2 = ifelse(sync_tone2 == 'RF', 'LHL', sync_tone2),
  sync_tone3 = ifelse(sync_tone3 == 'RF', 'LHL', sync_tone3))

f0_tri_ct <- f0_all_ct %>% 
  filter(diortri == 'tri' & sandhi_tone != 'outlier')  %>% 
  mutate(sync_tone23 = paste0(sync_tone2, '_', sync_tone3),
         hist_tone23 = paste0(hist_tone2, '_', hist_tone3),
         hist_tone23_mapped = gsub("yinping", "Ia", 
                         gsub("yangping", "Ib", 
                         gsub("yinshang", "IIa", 
                         gsub("yangshang", "IIb", 
                         gsub("yinqu", "IIIa", 
                         gsub("yangqu", "IIIb", hist_tone23)))))),
         all_tone = paste(sync_tone1, hist_tone23_mapped, sep = "_"),
         sandhi_tone = ifelse(sandhi_tone == 'HHL', 'MML', 
                              ifelse(sandhi_tone == 'LHL', 'MHL', sandhi_tone)))
  
f0_tri_ct_yp <- f0_tri_ct %>% filter(hist_tone1 == 'yinping') 
f0_tri_ct_yap <- f0_tri_ct %>% filter(hist_tone1 == 'yangping')
f0_tri_ct_ys <- f0_tri_ct %>% filter(hist_tone1 == 'yinshang')
f0_tri_ct_yas <- f0_tri_ct %>% filter(hist_tone1 == 'yangshang')

yinping-initial

Monosyllabic tones

f0_tri_ct_yp %>%
  distri_prop(hist_tone1, sync_tone1, syntax)+
  xlab("historical tone")+
  labs(fill = "synchronic tone")

f0_tri_ct_yp %>%
  filter(startsWith(hist_tone2, "yin")) %>%
  distri_prop(hist_tone2, sync_tone2, syntax)+
  xlab("historical tone")+
  labs(fill = "synchronic tone")

f0_tri_ct_yp %>%
  filter(startsWith(hist_tone3, "yin")) %>%
  distri_prop(hist_tone3, sync_tone3, syntax)+
  xlab("historical tone")+
  labs(fill = "synchronic tone")

f0_tri_ct_yp %>%
  filter(startsWith(hist_tone2, "yang")) %>%
  distri_prop(hist_tone2, sync_tone2, syntax)+
  xlab("historical tone")+
  labs(fill = "synchronic tone")

f0_tri_ct_yp %>%
  filter(startsWith(hist_tone3, "yang")) %>%
  distri_prop(hist_tone3, sync_tone3, syntax)+
  xlab("historical tone")+
  labs(fill = "synchronic tone")

Sandhi categorisation

Auditory categorisation

unique(f0_tri_ct_yp$sandhi_tone)
## [1] "HLM" "MHL" "HML" "MMH" "MML"
p_cluster(f0_tri_ct_yp, sandhi_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

k-means clustering

# data preparation
f0_tri_ct_yp_kmeans <- f0_tri_ct_yp %>% 
  select(-diortri, -syllable_no, -focus_no, -f0) %>% 
  spread(time, norm_f0)

# k-means clustering
cluster_model <- k_means_clustering(f0_tri_ct_yp_kmeans)
kml(cluster_model, nbClusters = 2:10) 
##  ~ Fast KmL ~
## ***************************************************************************************************S
## 100 ********************************************************************************S
kml::plot(cluster_model, 2, parTraj=parTRAJ(col="clusters"))

kml::plot(cluster_model, 3, parTraj=parTRAJ(col="clusters"))

kml::plot(cluster_model, 4, parTraj=parTRAJ(col="clusters"))

kml::plot(cluster_model, 5, parTraj=parTRAJ(col="clusters"))

plotAllCriterion(cluster_model)

# get cluster results
f0_tri_ct_yp_kmeans <- f0_tri_ct_yp_kmeans %>% 
  mutate(cluster3 = getClusters(cluster_model, 3),
         cluster5 = getClusters(cluster_model, 5))

f0_tri_ct_yp_kmeans <- wide_to_long(f0_tri_ct_yp_kmeans) %>% 
  mutate(syllable_no = ifelse(time < 11, '1', 
                              ifelse(time < 21, '2', '3'))) %>% 
  mutate(cluster3_reorder = case_when(cluster3 == 'B' ~ 'A',
                                      cluster3 == 'C' ~ 'B',
                                      cluster3 == 'A' ~ 'C'),
         cluster5_reorder = case_when(cluster5 == 'C' ~ 'A',
                                      cluster5 == 'A' ~ 'B',
                                      cluster5 == 'D' ~ 'C',
                                      cluster5 == 'E' ~ 'D',
                                      cluster5 == 'B' ~ 'E'))

k-means cluster visualisation

f0_tri_ct_yp_kmeans <- f0_tri_ct_yp_kmeans %>% 
  mutate(cluster3_reorder = case_when(cluster3 == 'B' ~ 'A',
                                      cluster3 == 'C' ~ 'B',
                                      cluster3 == 'A' ~ 'C'),
         cluster5_reorder = case_when(cluster5 == 'C' ~ 'A',
                                      cluster5 == 'A' ~ 'B',
                                      cluster5 == 'D' ~ 'C',
                                      cluster5 == 'E' ~ 'D',
                                      cluster5 == 'B' ~ 'E'))

p_kmeans3 <- p_cluster(f0_tri_ct_yp_kmeans, cluster3_reorder);p_kmeans3

p_kmeans5 <- p_cluster(f0_tri_ct_yp_kmeans, cluster5_reorder);p_kmeans5

heatmap distribution

heatmap_df <- heatmap_data(f0_tri_ct_yp_kmeans, cluster3_reorder) 
p_htmap3 <- compare_cluster(heatmap_df, 'cluster3_reorder'); p_htmap3

heatmap_df <- heatmap_data(f0_tri_ct_yp_kmeans, cluster5_reorder) 
p_htmap5 <- compare_cluster(heatmap_df, 'cluster5_reorder'); p_htmap5

Comparisons with disyllabic sandhi

H-M-L system validation

# monosyllable
unique(f0_all_ct$diortri) # check diortri value for monosyllables
## [1] NA    "di"  "tri"
f0_mono_ct <- f0_all_ct %>% 
  filter(is.na(diortri) == TRUE) %>% 
  mutate(syllable_no = 1) # select monosyllabic data
unique(f0_mono_ct$token) # check if dataset is correct
##  [1] "青" "书" "椒" "苦" "包" "手" "瓜" "樱" "机" "花" "海" "菜" "带" "草" "桃"
## [16] "莓" "水" "房" "牛" "链" "豆" "扁" "黄" "梅" "皮" "袄" "油" "杨" "鞋" "小"
## [31] "新" "车" "店" "门" "树" "路" "老" "布" "绳" "开" "修" "锁" "采" "买" "造"
## [46] "茶" "纸" "饭" "船" "工" "人" "籽" "厂" "毛"
unique(f0_mono_ct$citation_tone) # check tone inventory 
## [1] "HH" "HL" "RF" "LH"
unique(f0_mono_ct$syllable_no) 
## [1] 1
p_cluster(f0_mono_ct, citation_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

p_cluster(f0_mono_ct, citation_tone, 'speaker')
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

# S6's mono tones
f0_mono_s6 <- filter(f0_mono_ct, speaker == 'S6')
p_cluster(f0_mono_s6, citation_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

plotting sandhi patterns

# yinping-initial disyllabic dataset
f0_di_ct_lcmh_h <- f0_all_ct %>% 
  filter(syntax %in% c('L', 'M') & diortri == 'di' & sync_tone1 %in% c('HH', 'HL')) %>% 
  mutate(sandhi_tone = case_when(sandhi_tone == 'HLLM' ~ 'HHML',
                                 sandhi_tone == 'HMML' ~ 'HHML',
                                 sandhi_tone == 'LLHL' ~ 'LLRF', 
                                 .default = sandhi_tone)) %>% 
  filter(!ind_no %in% c('S2_1_ct', 'S2_11_ct', 'S2_27_ct', 
                        'S3_5_ct', 'S3_19_ct', 'S5_27_ct')) %>% 
  filter(is.na(sandhi_tone) == FALSE,
         sandhi_tone != 'HHHH') 

unique(f0_di_ct_lcmh_h$sandhi_tone) # check sandhi categories
## [1] "HHML" "MHHL" "MMMH"
# plotting
p_cluster_cont(f0_di_ct_lcmh_h, sandhi_tone)+
  labs(title = 'Disyllabic sandhi')+
  theme(plot.title = element_text(hjust = 0.5, size = 20, face = "bold"))

p_cluster_cont(f0_tri_ct_yp, sandhi_tone)+
  scale_color_manual(values = c("#4477AA", "#66CCEE", "#DDCC77", "#CC6677", "#9B72C7"))+
  labs(title = 'Trisyllabic sandhi')+
  theme(plot.title = element_text(hjust = 0.5, size = 20, face = "bold"))

# check individual contours
# ggplotly(draw_by(f0_di_ct_lcmh_h, 'sandhi_tone'), tooltip = c('text', 'x'))

direct comparions between di & tri sandhi

# direct comparisons of similar sandhi contours
f0_tri_yp_comp <- f0_tri_ct_yp %>% 
  #filter(sandhi_tone %in% c('HLM', 'MHL', 'MMH')) %>% 
  select(-sync_tone23, -hist_tone23, -hist_tone23_mapped, -all_tone) %>% 
  mutate(sandhi_category = case_when(sandhi_tone == 'HLM' ~ 'Fall',
                                     sandhi_tone == 'MHL' ~ 'Rise-fall',
                                     sandhi_tone == 'MMH' ~ 'Rise',
                                     .default = sandhi_tone),
         sandhi_tone = paste0('tri: ', sandhi_tone))

f0_di_yp_comp <- f0_di_ct_lcmh_h %>% 
  mutate(sandhi_category = case_when(sandhi_tone == 'HHML' ~ 'Fall',
                                     sandhi_tone == 'MHHL' ~ 'Rise-fall',
                                     .default = 'Rise'),
         sandhi_tone = paste0('di: ', sandhi_tone))

f0_yp_comp <- rbind(f0_tri_yp_comp %>% 
                      filter(sandhi_tone %in% c('tri: HLM', 'tri: MHL', 'tri: MMH')),
                    f0_di_yp_comp) %>% 
  mutate(sandhi_tone = factor(sandhi_tone, 
                              levels = c('di: HHML', 'tri: HLM', 
                                         'di: MMMH', 'tri: MMH',
                                         'di: MHHL', 'tri: MHL')))
  
p_cluster_cont(f0_yp_comp, sandhi_tone, 'sandhi_category')+
  scale_color_manual(values = c("#4477AA", "#88D3E9", "#DDCC77", "#AA9944","#CC6677", "#882255"))+
  labs(title = 'Comparisons of di- and tri-syllabic sandhi')+
  theme(plot.title = element_text(hjust = 0.5, size = 20, face = "bold"))

overall distribution comparison

f0_yp_comp1 <- rbind(f0_tri_yp_comp,f0_di_yp_comp) %>% 
  mutate(sandhi_tone = factor(sandhi_tone, 
                              levels = c('di: HHML', 'tri: HLM', 
                                         'di: MMMH', 'tri: MMH',
                                         'di: MHHL', 'tri: MHL')),
         sandhi_category = factor(sandhi_category,
                                  levels = c('Fall', 'Rise', 'Rise-fall',
                                             'HML', 'MML')))

distri_prop2(f0_yp_comp1, diortri, sandhi_category)

  scale_fill_manual(values = c("#4477AA", "#DDCC77", "#CC6677"))
## <ggproto object: Class ScaleDiscrete, Scale, gg>
##     aesthetics: fill
##     axis_order: function
##     break_info: function
##     break_positions: function
##     breaks: waiver
##     call: call
##     clone: function
##     dimension: function
##     drop: TRUE
##     expand: waiver
##     get_breaks: function
##     get_breaks_minor: function
##     get_labels: function
##     get_limits: function
##     guide: legend
##     is_discrete: function
##     is_empty: function
##     labels: waiver
##     limits: NULL
##     make_sec_title: function
##     make_title: function
##     map: function
##     map_df: function
##     n.breaks.cache: NULL
##     na.translate: TRUE
##     na.value: grey50
##     name: waiver
##     palette: function
##     palette.cache: NULL
##     position: left
##     range: environment
##     rescale: function
##     reset: function
##     scale_name: manual
##     train: function
##     train_df: function
##     transform: function
##     transform_df: function
##     super:  <ggproto object: Class ScaleDiscrete, Scale, gg>

Distribution analysis

trisyllabic sandhi patterns

f0_tri_ct_yp_12mh <- f0_tri_ct_yp %>% filter(syntax == "1+2MH")
f0_tri_ct_yp_21ll <- f0_tri_ct_yp %>% filter(syntax == "2+1LL")
f0_tri_ct_yp_21vl <- f0_tri_ct_yp %>% filter(syntax == "2+1VL")

# overall
f0_tri_ct_yp %>% distri_prop2(syntax, sandhi_tone)

f0_tri_ct_yp %>% distri_prop(hist_tone23_mapped, sandhi_tone, syntax)

f0_tri_ct_yp %>% distri_prop2(speaker, sandhi_tone)

# 1+2MH
f0_tri_ct_yp_12mh %>% 
 filter(sync_tone1 == 'HH') %>% 
  distri_prop2(all_tone, sandhi_tone)

f0_tri_ct_yp_12mh %>% 
 filter(sync_tone1 == 'HL') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

# 2+1 LL
#distri_prop2(f0_tri_ct_yp_21ll, sync_tone1, sandhi_tone)
#distri_prop(f0_tri_ct_yp_21ll, hist_tone3, sandhi_tone, hist_tone2)
f0_tri_ct_yp_21ll %>% 
 filter(sync_tone1 == 'HH') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

f0_tri_ct_yp_21ll %>% 
 filter(sync_tone1 == 'HL') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

# 2+1VL
#distri_prop2(f0_tri_ct_yp_21vl, sync_tone1, sandhi_tone)
#distri_prop(f0_tri_ct_yp_21vl, hist_tone3, sandhi_tone, hist_tone2
f0_tri_ct_yp_21vl %>% 
 filter(sync_tone1 == 'HH') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

f0_tri_ct_yp_21vl %>% 
 filter(sync_tone1 == 'HL') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

f0_tri_ct_yp_12mh %>% filter(sandhi_tone == 'HML') %>% select(ind_no) %>% distinct()
## # A tibble: 8 × 1
##   ind_no  
##   <chr>   
## 1 S1_27_ct
## 2 S1_35_ct
## 3 S3_27_ct
## 4 S3_35_ct
## 5 S4_27_ct
## 6 S4_35_ct
## 7 S8_27_ct
## 8 S8_35_ct
f0_tri_ct_yp_21ll %>% filter(sandhi_tone == 'HML') %>% select(ind_no) %>% distinct()
## # A tibble: 3 × 1
##   ind_no  
##   <chr>   
## 1 S8_23_ct
## 2 S8_38_ct
## 3 S8_44_ct
f0_tri_ct_yp_21vl %>% filter(sandhi_tone == 'HML') %>% select(ind_no) %>% distinct()
## # A tibble: 3 × 1
##   ind_no  
##   <chr>   
## 1 S1_43_ct
## 2 S4_43_ct
## 3 S8_43_ct

build model

f0_tri_ct_yp <- f0_tri_ct_yp %>% 
  mutate(sandhi_tone = as.factor(sandhi_tone),
         sync_tone1 = as.factor(sync_tone1),
         hist_tone2 = as.factor(hist_tone2),
         hist_tone3 = as.factor(hist_tone3),
         syntax = as.factor(syntax),
         speaker = as.factor(speaker))

model <- multinom(sandhi_tone ~ sync_tone1 + hist_tone2 + hist_tone3 + syntax + speaker, data = f0_tri_ct_yp)
summary(model)

model_a1 <- multinom(sandhi_tone ~ sync_tone1 + hist_tone2 + hist_tone3 + speaker*syntax, data = f0_tri_ct_yp)
anova(model_a1, model, test = "Chisq")

model_1 <- multinom(sandhi_tone ~ hist_tone2 + hist_tone3 + syntax + speaker, data = f0_tri_ct_yp)
anova(model_1, model, test = "Chisq")

model_2 <- multinom(sandhi_tone ~ sync_tone1 + hist_tone3 + syntax + speaker, data = f0_tri_ct_yp)
anova(model_2, model, test = "Chisq")

model_3 <- multinom(sandhi_tone ~ sync_tone1 + hist_tone2 + syntax + speaker, data = f0_tri_ct_yp)
anova(model_3, model, test = "Chisq")

model_4 <- multinom(sandhi_tone ~ sync_tone1 + hist_tone2 + hist_tone3 + syntax, data = f0_tri_ct_yp)
anova(model_4, model, test = "Chisq")

model_5 <- multinom(sandhi_tone ~ sync_tone1 + hist_tone2 + hist_tone3 + speaker, data = f0_tri_ct_yp)
anova(model_5, model, test = "Chisq")

yinshang-initial

unique(f0_tri_ct_ys$sandhi_tone)
## [1] "MHL" "HLM" "HML" "MMH" "MMM"
p_cluster(f0_tri_ct_ys, sandhi_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Warning: Removed 9 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 9 rows containing missing values (`geom_line()`).

monosyllable monosyllabic tones

f0_tri_ct_ys %>%
  distri_prop(hist_tone1, sync_tone1, syntax)+
  labs(fill = "synchronic tone")

f0_tri_ct_ys %>%
  filter(startsWith(hist_tone2, "yin")) %>%
  distri_prop(hist_tone2, sync_tone2, syntax)+
  labs(fill = "synchronic tone")

f0_tri_ct_ys %>%
  filter(startsWith(hist_tone3, "yin")) %>%
  distri_prop(hist_tone3, sync_tone3, syntax)+
  labs(fill = "synchronic tone")

f0_tri_ct_ys %>%
  filter(startsWith(hist_tone2, "yang")) %>%
  distri_prop(hist_tone2, sync_tone2, syntax)+
  labs(fill = "synchronic tone")

f0_tri_ct_ys %>%
  filter(startsWith(hist_tone3, "yang")) %>%
  distri_prop(hist_tone3, sync_tone3, syntax)+
  labs(fill = "synchronic tone")

trisyllabic sandhi patterns

unique(f0_tri_ct_ys$syntax)
## [1] "1+2MH" "1+2VO" "2+1LL"
f0_tri_ct_ys_12mh <- f0_tri_ct_ys %>% filter(syntax == "1+2MH")
f0_tri_ct_ys_21ll <- f0_tri_ct_ys %>% filter(syntax == "2+1LL")
f0_tri_ct_ys_12vo <- f0_tri_ct_ys %>% filter(syntax == "1+2VO")

# 1+2MH
f0_tri_ct_ys_12mh %>% 
 filter(sync_tone1 == 'HH') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

f0_tri_ct_ys_12mh %>% 
 filter(sync_tone1 == 'HL') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

# 2+1 LL
f0_tri_ct_ys_21ll %>% 
 filter(sync_tone1 == 'HH') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

f0_tri_ct_ys_21ll %>% 
 filter(sync_tone1 == 'HL') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

# 1+2VO
f0_tri_ct_ys_12vo %>% 
 filter(sync_tone1 == 'HH') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

f0_tri_ct_ys_12vo %>% 
 filter(sync_tone1 == 'HL') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

## yangshang-initial

unique(f0_tri_ct_yas$sandhi_tone)
## [1] "MHL" "LLH" "LMH"
p_cluster(f0_tri_ct_yas, sandhi_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).

monosyllabic tones

f0_tri_ct_yas %>%
  distri_prop(hist_tone1, sync_tone1, syntax)+
  labs(fill = "synchronic tone")

f0_tri_ct_yas %>%
  filter(startsWith(hist_tone2, "yin")) %>%
  distri_prop(hist_tone2, sync_tone2, syntax)+
  labs(fill = "synchronic tone")

f0_tri_ct_yas %>%
  filter(startsWith(hist_tone3, "yin")) %>%
  distri_prop(hist_tone3, sync_tone3, syntax)+
  labs(fill = "synchronic tone")

f0_tri_ct_yas %>%
  filter(startsWith(hist_tone2, "yang")) %>%
  distri_prop(hist_tone2, sync_tone2, syntax)+
  labs(fill = "synchronic tone")

f0_tri_ct_yas %>%
  filter(startsWith(hist_tone3, "yang")) %>%
  distri_prop(hist_tone3, sync_tone3, syntax)+
  labs(fill = "synchronic tone")

unique(f0_tri_ct_yas$syntax)
## [1] "1+2MH" "1+2VO" "2+1VL"
f0_tri_ct_yas_12mh <- f0_tri_ct_yas %>% filter(syntax == "1+2MH")
f0_tri_ct_yas_21vl <- f0_tri_ct_yas %>% filter(syntax == "2+1VL")
f0_tri_ct_yas_12vo <- f0_tri_ct_yas %>% filter(syntax == "1+2VO")

# 1+2MH
f0_tri_ct_yas_12mh %>% 
 filter(sync_tone1 == 'LHL') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

# 2+1 VL
f0_tri_ct_yas_21vl %>% 
 filter(sync_tone1 == 'LHL') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

f0_tri_ct_yas_21vl %>% 
 filter(sync_tone1 == 'LH') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

# 1+2VO
f0_tri_ct_yas_12vo %>% 
 filter(sync_tone1 == 'LHL') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)

f0_tri_ct_yas_12vo %>% 
 filter(sync_tone1 == 'LH') %>% 
  distri_prop2(hist_tone23_mapped, sandhi_tone)